* Reset settings and initialize log file
launch, path("share/stocks_flows")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Decompose changes in EPOP into inflow/outflow components.
*-------------------------------------------------------------------------------


* Prepare data and run models
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load data on adult individuals
	gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

	* Retain variables we need
	keep pid tm year month wtfinl wtraked tmspline* weeks female emp

	* Create flow indicators
	gen byte inflow = (L.emp == 0 & emp == 1) if !missing(wtraked)
	gen byte outflow = (L.emp == 1 & emp == 0) if !missing(wtraked)

	* Store the data
	tempfile core
	save `core'

	* Compute aggregate flows
	gcollapse (mean) inflow outflow (rawsum) wtraked [pw = wtraked], by(female tm)
	tempfile flows
	save `flows'

	* Compute aggregate stocks and weights
	use `core', clear
	gcollapse (mean) emp (rawsum) wtfinl (first) month tmspline* weeks [pw = wtfinl], by(female tm)

	* Combine stocks and flows
	merge 1:1 female tm using `flows', assert(1 3) nogenerate
	tsset female tm

	* Loop over sex
	foreach f of numlist 0 1 {
		* Loop over outcome variables
		foreach yvar of varlist emp inflow outflow {
			* Specify controls and weights
			if "`yvar'" == "emp" {
				local controls tmspline* weeks
				local wtvar wtfinl
			}
			else {
				local controls D.tmspline* D.weeks
				local wtvar wtraked
			}

			* Run the specification
			quietly ivreg2 `yvar' ib5.month `controls' if female == `f' [aw = `wtvar'], bw($bandwidth) robust small
			process_estimates, path("stocks_flows") model("f`f'_`yvar'")
		}
	}
}


* Decompose stock effects into inflow/outflow components
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load estimates into memory
	load_estimates, path("stocks_flows")

	* Prepare coefficient labels for each month
	make_coeflabels

	* Loop over sexes
	foreach f of numlist 0 1 {
		* Process estimates from the stock specification
		estimates restore f`f'_emp
		matrix B = e(b)
		clear
		svmat B, names(col)
		gen female = `f'
		reshape long coef, i(female) j(month_adj)
		rename coef emp
		gen demp = emp - emp[_n - 1]
		keep female month_adj demp
		keep if inrange(month_adj, 1, 12)
		tempfile f`f'_emp
		save "`f`f'_emp'"

		* Loop over inflows/outflows
		foreach x in "inflow" "outflow" {
			estimates restore f`f'_`x'

			* Take the average of the estimated coefficients
			local beta_bar "1/12 * (_b[coef1]"
			forvalues m = 2/12 {
				local beta_bar `"`beta_bar' + _b[coef`m']"'
			}
			local beta_bar `"`beta_bar' )"'

			* Compute excess flows, relative to the monthly average
			local coeflist
			forvalues m = 1/12 {
				local coeflist "`coeflist' (`x'`m' = _b[coef`m'] - `beta_bar')"
			}

			* Transform estimates
			quietly xlincom `coeflist', post

			* Store estimates
			matrix B = e(b)
			clear
			svmat B, names(col)
			gen female = `f'
			reshape long `x', i(female) j(month_adj)
			tempfile f`f'_`x'
			save "`f`f'_`x''"
		}

		* Combine stock and flow estimates
		use "`f`f'_emp'", clear
		merge 1:1 female month_adj using "`f`f'_inflow'", assert(3) nogenerate
		merge 1:1 female month_adj using "`f`f'_outflow'", assert(3) nogenerate

		* Reshape
		rename (demp inflow outflow) (betademp betainflow betaoutflow)
		reshape long beta, i(female month_adj) j(measure) string

		* Store the estimates
		tempfile f`f'
		save `f`f''
	}

	* Stack estimates across sexes and outcomes
	clear
	append using `f0'
	append using `f1'

	* Save plotting points
	order female measure month_adj beta
	sort female measure month_adj
	compress
	save "$basepath/models/stocks_flows/plotting_points.dta", replace
}


* Prepare for plotting
*-------------------------------------------------------------------------------

* Load the plotting points
use "$basepath/models/stocks_flows/plotting_points.dta", clear

* Fix time convention (putting April => May changes first)
replace month_adj = mod(month_adj + 1, 12) + 12 * (mod(month_adj + 1, 12) == 0)
sort female measure month_adj

label define month_adj_lbl 1 "April -> May", replace
label define month_adj_lbl 2 "May -> June", add
label define month_adj_lbl 3 "June -> July", add
label define month_adj_lbl 4 "July -> Aug.", add
label define month_adj_lbl 5 "Aug. -> Sept.", add
label define month_adj_lbl 6 "Sept. -> Oct.", add
label define month_adj_lbl 7 "Oct. -> Nov.", add
label define month_adj_lbl 8 "Nov. -> Dec.", add
label define month_adj_lbl 9 "Dec. -> Jan.", add
label define month_adj_lbl 10 "Jan. -> Feb.", add
label define month_adj_lbl 11 "Feb. -> Mar.", add
label define month_adj_lbl 12 "Mar. -> Apr.", add
label values month_adj month_adj_lbl

* Flip the sign on outflows
replace beta = -1 * beta if measure == "outflow"

* Express effects in percentage terms
replace beta = 100 * beta

* Compute sum of inflow and outflow components
rename beta beta_
reshape wide beta_, i(female month_adj) j(measure) string
rename beta_* *
gen sumflows = inflow + outflow

* Compare stock and flow estimates
list female month_adj demp sumflows if female == 0
list female month_adj demp sumflows if female == 1

* Report the contributions from inflows and outflows between May and July
egen contribution_inflows = total(inflow * (female == 1 & inlist(month_adj, 2, 3)))
egen contribution_outflows = total(outflow * (female == 1 & inlist(month_adj, 2, 3)))
display "Inflows: " %9.2f contribution_inflows
display "Outflows: " %9.2f contribution_outflows


* Plot the decomposition results
*-------------------------------------------------------------------------------

* Bar positioning
gen curr_pos = 0
gen curr_neg = 0

foreach v in "inflow" "outflow" {
	gen `v'0 = .
	replace `v'0 = curr_pos if sign(`v') >= 0
	replace `v'0 = curr_neg if sign(`v') < 0

	gen `v'1 = `v'0 + `v'
	replace curr_pos = curr_pos + `v' if `v' > 0
	replace curr_neg = curr_neg + `v' if `v' < 0
}

drop curr_*

* Prepare shading
gen rlow = -1
gen rupp = +1
gen rval = month_adj - 0.5

* Plot estimates
foreach f of numlist 0 1 {
	if `f' == 0 local flbl "Men"
	if `f' == 1 local flbl "Women"

	#delimit ;
	twoway
		(rarea rupp rlow rval if female == `f' & inrange(month_adj, 1, 6), color($ltgs) plotregion(margin(t=0 b=0 l=0)))
		(rbar inflow0 inflow1 month_adj if female == `f', color(gs11) lcolor(black) lwidth(vthin))
		(rbar outflow0 outflow1 month_adj if female == `f', color(gs6) lcolor(black) lwidth(vthin))
		(connected sumflows month_adj if female == `f', color(black) msymbol(circle) clwidth(medthick)),
		title("{bf:`flbl'}")
		xtitle("")
		xscale(range(0.5 12.5) titlegap(*-30))
		xlabel(
			1 "Apr. {&rarr} May"
			2 "May {&rarr} June"
			3 "June {&rarr} July"
			4 "July {&rarr} Aug."
			5 "Aug. {&rarr} Sept."
			6 "Sept. {&rarr} Oct."
			7 "Oct. {&rarr} Nov."
			8 "Nov. {&rarr} Dec."
			9 "Dec. {&rarr} Jan."
			10 "Jan. {&rarr} Feb."
			11 "Feb. {&rarr} Mar."
			12 "Mar. {&rarr} Apr.",
			labsize(*0.9) angle(vertical))
		ytitle("")
		yscale(range(-1 1))
		ylabel(-1(0.5)1)
		yline(0)
		graphregion(margin(small))
		plotregion(margin(zero))
		legend(rows(1) order(4 2 3) size(*.75) label(2 "Inflow component") label(3 "Outflow component") label(4 "Net change in EPOP"));
	#delimit cr

	tempfile g`f'
	graph save "`g`f''"
}

grc1leg "`g1'" "`g0'", l1title("Difference relative to May (p.p.)", size(*.8)) imargin(small)
nicepdf "$basepath/output/stocks_flows.pdf", panels(2) indirect replace

* Close the log file
unlaunch
